## ---------------------------
##
## Script name: process_demographics_Chicago.R
##
## Description: Cleans and joins demographic variables for 5 decades in Chicago. 
## All data downloaded from https://SocialExplorer.com. Demographics 
## for each 10-year census are gathered, and then the years in 
## between are imputed based on the change between those years. 
## The year 2020 is assigned the same values as 2019 since
## 2020 data was not released when this analysis was
## completed.
##
## ---------------------------
## DEPENDENCIES

library(dplyr)

## ---------------------------


process_demographics_Chicago <- function(file_locations, population_cutoff, overwrite=F) {
  #'
  #'@description Calls a function to clean and format raw
  #'demographics files, unions them together, and fills
  #'in population data for the years in between the decades.
  #'
  #'@param file_locations list. File locations loaded from
  #'file_locations.R
  #'
  #'@param population_cutoff integer. The lowest population value
  #'that a tract is allowed to have in the entire 56-year
  #'period. Otherwise, it is filtered out.
  #'
  #'@param overwrite boolean. Determines if already processed 
  #'file should be overwritten.
  #'
  #'@return saves the clean output and returns NULL
  
  decade_list <- .format_raw_demographics_data(file_locations)
  
  t1970 <- decade_list$`1970`
  t1980 <- decade_list$`1980`
  t1990 <- decade_list$`1990`
  t2000 <- decade_list$`2000`
  t2010 <- decade_list$`2010`
  t2019 <- decade_list$`2019`

  #--------------------------------
  ct_vars <- t1970 %>%
    union_all(t1980 %>% mutate(year = 1980)) %>%
    union_all(t1990 %>% mutate(year = 1990)) %>%
    union_all(t2000 %>% mutate(year = 2000)) %>%
    union_all(t2010 %>% mutate(year = 2010)) %>%
    union_all(t2019 %>% mutate(year = 2019))
  
  # Fill in population between decennial years
  ct_vars <- .fill_missing_population(ct_vars, id_col="tract_2010", cols_to_impute=c("total_population","total_population_white","total_population_black",
                                                                                     "total_population_other","total_below_pov_level"),
                                      years_to_include=seq(1965, 2020))
  
  ct_vars <- ct_vars %>%
    group_by(tract_2010) %>%
    mutate(lowest_pop_value = min(total_population)) %>%
    filter(lowest_pop_value >= population_cutoff) %>%
    dplyr::select(-lowest_pop_value)
  
  # compute the 5-year rolling mean for each year to avoid
  # noise due to year-to-year changes
  ct_vars_avg <- .generate_rolling_5yrmean_demographics(ct_vars)
  ct_vars_avg <- ct_vars_avg %>%
    dplyr::select(tract_2010, year, poverty_bin, majority_race_bin)
  
  # join both the regular numbers and the rolling mean numbers
  ct_vars <- ct_vars %>%
    inner_join(ct_vars_avg, by=c("tract_2010","year"))
  
  output_fp <- paste(file_locations$current_fp, file_locations$Chicago$Demographics$processed, sep="/")
  if (!file.exists(output_fp) | overwrite) {
    write.csv(ct_vars, output_fp, row.names=F)
  }
}



.format_raw_demographics_data <- function(file_locations) {
  #'
  #'@description Loads raw data, generates population columns,
  #'and converts to 2010 tracts.
  #'
  #'@param file_locations list. File locations loaded from
  #'file_locations.R
  #'
  #'@return list of dataframes for each decade of homicides

  
  #-------------------------- A. Decade 1970
  t1970 <- .process_demo_file(filepath=paste(file_locations$current_fp, file_locations$Chicago$Demographics$raw$`1970`, sep="/"),
                          fips_col="FIPS",
                          ct_num_col="Census Tract",
                          total_pop_col="Total Population",
                          white_pop_col="White",
                          black_pop_col="Black",
                          other_race_col="Other",
                          total_pov_responses_col="Families",
                          below_pov_col="Families: Below Poverty Level")
  t1970 <- t1970 %>% mutate(year=1970)

  t1970 <- t1970 %>%
    rename(tract_2010 = fips) %>%
    select(tract_2010, year, starts_with("total"))
  
  #-------------------------- B. 1980
  t1980 <- .process_demo_file(filepath=paste(file_locations$current_fp, file_locations$Chicago$Demographics$raw$`1980`, sep="/"),
                          fips_col="FIPS",
                          ct_num_col="Census Tract",
                          total_pop_col="Total Population",
                          white_pop_col=c("Total Population: Persons not of Spanish Origin: White",
                                          "Total Population: Persons of Spanish Origin: White"),
                          black_pop_col=c("Total Population: Persons not of Spanish Origin: Black",
                                          "Total Population: Persons of Spanish Origin: Black"),
                          other_race_col=c("Total Population: Persons not of Spanish Origin: Asian, Pacific Islander, American Indian, Eskimo, Aleut",
                                           "Total Population: Persons not of Spanish Origin: Other",
                                           "Total Population: Persons of Spanish Origin: American Indian, Eskimo, Aleut, and Asian and Pacific Islander",
                                           "Total Population: Persons of Spanish Origin: Other"),
                          total_pov_responses_col="Population for Whom Poverty Status is Determined",
                          below_pov_col="Population for Whom Poverty Status is Determined: Below Poverty Level")
  t1980 <- t1980 %>% mutate(year=1980)

  t1980 <- t1980 %>%
    rename(tract_2010 = fips) %>%
    select(tract_2010, year, starts_with("total"))

  
  #-------------------------- C. 1990 vars
  t1990 <- .process_demo_file(filepath = paste(file_locations$current_fp, file_locations$Chicago$Demographics$raw$`1990`, sep="/"),
                          fips_col="FIPS",
                          ct_num_col="Census Tract",
                          total_pop_col="Total Population",
                          white_pop_col=c("Total Population: Non-Hispanic: White",
                                          "Total Population: Hispanic: White"),
                          black_pop_col=c("Total Population: Non-Hispanic: Black",
                                          "Total Population: Hispanic: Black"),
                          other_race_col=c("Total Population: Non-Hispanic: American Indian, Eskimo, or Aleut",
                                           "Total Population: Non-Hispanic: Asian or Pacific Islander",
                                           "Total Population: Non-Hispanic: Other race",
                                           "Total Population: Hispanic: American Indian, Eskimo, or Aleut",
                                           "Total Population: Hispanic: Asian or Pacific Islander",
                                           "Total Population: Hispanic: Other race"),
                          total_pov_responses_col="Persons for whom poverty status is determined",
                          below_pov_col="Persons for whom poverty status is determined: Income in 1989 below poverty level")

  t1990 <- t1990 %>% mutate(year=1990)

  t1990 <- t1990 %>%
    rename(tract_2010 = fips) %>%
    select(tract_2010, year, starts_with("total"))

  
  #-------------------------- D. 2000 vars
  t2000 <- .process_demo_file(filepath=paste(file_locations$current_fp, file_locations$Chicago$Demographics$raw$`2000`, sep="/"),
                             fips_col="FIPS",
                             ct_num_col="Census Tract",
                             total_pop_col="Total Population",
                             white_pop_col=c("Not Hispanic or Latino: White Alone",
                                             "Hispanic or Latino: White Alone"),
                             black_pop_col=c("Not Hispanic or Latino: Black or African American Alone",
                                             "Hispanic or Latino: Black or African American Alone"),
                             other_race_col=c("Not Hispanic or Latino: American Indian and Alaska Native Alone",
                                              "Not Hispanic or Latino: Asian Alone",
                                              "Not Hispanic or Latino: Native Hawaiian and Other Pacific Islander Alone",
                                              "Not Hispanic or Latino: Some Other Race Alone",
                                              "Not Hispanic or Latino: Two or More Races",
                                              "Hispanic or Latino: American Indian and Alaska Native Alone",
                                              "Hispanic or Latino: Asian Alone",
                                              "Hispanic or Latino: Native Hawaiian and Other Pacific Islander Alone",
                                              "Hispanic or Latino: Some Other Race Alone",
                                              "Hispanic or Latino: Two or More Races"),
                             total_pov_responses_col=c("Male Population","Female Population"),
                             below_pov_col=c("Male Population: Below Poverty Level","Female Population: Below Poverty Level"))
  
  t2000 <- t2000 %>% mutate(year=2000)

  t2000 <- t2000 %>%
    rename(tract_2010 = fips) %>%
    select(tract_2010, year, starts_with("total"))
    
  
  #-------------------------- E. 2010 vars
  # i. poverty
  acs2010 <- .process_demo_file(filepath=paste(file_locations$current_fp, file_locations$Chicago$Demographics$raw$`2010`$poverty, sep="/"),
                               fips_col="FIPS",
                               ct_num_col="Census Tract",
                               total_pop_col=NULL,
                               white_pop_col=NULL,
                               black_pop_col=NULL,
                               other_race_col=NULL,
                               total_pov_responses_col="Population for Whom Poverty Status Is Determined",
                               below_pov_col="Population for Whom Poverty Status Is Determined: Income in the Past 12 Months Below Poverty Level")
  
  
  # ii. race
  c2010 <- .process_demo_file(filepath=paste(file_locations$current_fp, file_locations$Chicago$Demographics$raw$`2010`$race, sep="/"),
                             fips_col="FIPS",
                             ct_num_col="Census Tract",
                             total_pop_col="Total Population",
                             white_pop_col=c("Total population: Not Hispanic or Latino: White alone",
                                             "Total population: Hispanic or Latino: White alone"),
                             black_pop_col=c("Total population: Not Hispanic or Latino: Black or African American alone",
                                             "Total population: Hispanic or Latino: Black or African American alone"),
                             other_race_col=c("Total population: Not Hispanic or Latino: American Indian and Alaska Native alone",
                                              "Total population: Not Hispanic or Latino: Asian alone",
                                              "Total population: Not Hispanic or Latino: Native Hawaiian and Other Pacific Islander alone",
                                              "Total population: Not Hispanic or Latino: Some Other Race alone",
                                              "Total population: Not Hispanic or Latino: Two or More Races",
                                              "Total population: Hispanic or Latino: American Indian and Alaska Native alone",
                                              "Total population: Hispanic or Latino: Asian alone",
                                              "Total population: Hispanic or Latino: Native Hawaiian and Other Pacific Islander alone",
                                              "Total population: Hispanic or Latino: Some Other Race alone",
                                              "Total population: Hispanic or Latino: Two or More Races"),
                             total_pov_responses_col=NULL,
                             below_pov_col=NULL)
  
  # iii. combine
  t2010 <- c2010 %>%
    inner_join(acs2010, by=c("fips", "ct_number")) %>%
    rename(tract_2010 = fips) %>%
    select(-ct_number)
  
  t2010 <- t2010 %>%
    mutate(total_below_pov_level = ifelse(total_pov_responses > 0, (below_pov_level*total_population)/total_pov_responses, 0)) %>%
    select(-total_pov_responses, -below_pov_level)

  
  #-------------------------- F. 2020 vars
  t2019 <- .process_demo_file(filepath=paste(file_locations$current_fp, file_locations$Chicago$Demographics$raw$`2019`, sep="/"),
                            fips_col="FIPS",
                            ct_num_col="Census Tract",
                            total_pop_col="Total Population",
                            white_pop_col=c("Total Population: Not Hispanic or Latino: White Alone",
                                            "Total Population: Hispanic or Latino: White Alone"),
                            black_pop_col=c("Total Population: Not Hispanic or Latino: Black or African American Alone",
                                            "Total Population: Hispanic or Latino: Black or African American Alone"),
                            other_race_col=c("Total Population: Not Hispanic or Latino: American Indian and Alaska Native Alone",
                                             "Total Population: Not Hispanic or Latino: Asian Alone",
                                             "Total Population: Not Hispanic or Latino: Native Hawaiian and Other Pacific Islander Alone",
                                             "Total Population: Not Hispanic or Latino: Some Other Race Alone",
                                             "Total Population: Not Hispanic or Latino: Two or More Races",
                                             "Total Population: Hispanic or Latino: American Indian and Alaska Native Alone",
                                             "Total Population: Hispanic or Latino: Asian Alone",
                                             "Total Population: Hispanic or Latino: Native Hawaiian and Other Pacific Islander Alone",
                                             "Total Population: Hispanic or Latino: Some Other Race Alone",
                                             "Total Population: Hispanic or Latino: Two or More Races"),
                            total_pov_responses_col="Population for Whom Poverty Status Is Determined",
                            below_pov_col="Population for Whom Poverty Status Is Determined: Income in the Past 12 Months Below Poverty Level")
  
  t2019 <- t2019 %>%
    rename(tract_2010 = fips) %>%
    select(-ct_number)
  
 
  return(list("1970" = t1970,
              "1980" = t1980,
              "1990" = t1990,
              "2000" = t2000,
              "2010" = t2010,
              "2019" = t2019))
   
}


.process_demo_file <- function(filepath,
                              fips_col=NULL,
                              ct_num_col=NULL,
                              total_pop_col=NULL,
                              white_pop_col=NULL,
                              black_pop_col=NULL,
                              other_race_col=NULL,
                              total_pov_responses_col=NULL,
                              below_pov_col=NULL) {
  #'
  #'@description Helper function that takes column names
  #'from the raw demographic files and calculates the total
  #'population values and changes to consistent column names.
  #'
  #'@param filepath character. filepath to the raw data
  #'@param fips_col character. raw column name with census
  #'tract FIPS code.
  #'@param ct_num_col character. raw column name with census
  #'tract number.
  #'@param total_pop_col character or vector. column(s) with 
  #'total population of census tract. If vector, columns are
  #'added together.
  #'@param white_pop_col character or vector. column(s) with 
  #'white population of census tract. If vector, columns are
  #'added together.
  #'@param black_pop_col character or vector. column(s) with 
  #'Black population of census tract. If vector, columns are
  #'added together.
  #'@param other_race_col character or vector. column(s) with 
  #'population of all other races of census tract. If vector, 
  #'columns are added together.
  #'@param total_pov_responses_col character or vector. column(s) with 
  #'number of responses to question of poverty in census tract. 
  #'If vector, columns are added together.
  #'@param below_pov_col character or vector. column(s) with 
  #'number below poverty level out of people who responded to 
  #'question of poverty in census tract. If vector, 
  #'columns are added together.
  #'
  #'@return dataframe with census tract codes and population info.

  
  # load raw data
  df <- read_csv(filepath)
  df <- df[2:nrow(df),]
    
  cols <- list(list(input=fips_col,output="fips"),
               list(input=ct_num_col,output="ct_number"),
               list(input=total_pop_col,output="total_population"),
               list(input=white_pop_col,output="total_population_white"),
               list(input=black_pop_col,output="total_population_black"),
               list(input=other_race_col,output="total_population_other"),
               list(input=total_pov_responses_col,output="total_pov_responses"),
               list(input=below_pov_col,output="below_pov_level"))
  
  cols_flat <- unlist(cols)
  numeric_cols <- cols_flat[grepl("input",names(cols_flat)) & !cols_flat %in% c(fips_col, ct_num_col) ]
  df[, numeric_cols] <- lapply(numeric_cols, function(x) as.numeric(df[[x]]))

  
  cols_to_select <- c()
  for (i in 1:length(cols)) {
    
    if (!is.null(cols[[i]]$input)) {
      
      if (length(cols[[i]]$input) > 1) {
        
        df[,cols[[i]]$output] <- rowSums(df[,cols[[i]]$input])
        
      } else {
        
        df[,cols[[i]]$output] <- df[,cols[[i]]$input]
        
      }
      
      cols_to_select <- append(cols_to_select, cols[[i]]$output)
    }
  }
  
  df <- df[,cols_to_select]

  if (!is.null(total_pov_responses_col) & !is.null(below_pov_col) & !is.null(total_pop_col)) {
    df <- df %>%
      mutate(total_below_pov_level = ifelse(total_pov_responses > 0, (below_pov_level*total_population)/total_pov_responses, 0)) %>%
      select(-total_pov_responses, -below_pov_level)
  }
  
  city_tracts <- read_sf(paste(file_locations$current_fp, file_locations$Nation$Geographies$processed, sep="/")) %>%
    filter(city=="Chicago")

  df <- df %>%
    filter(fips %in% city_tracts$tract_2010)

  return(df)
  
}
